The purpose of this file is to practice applying basic R commands learned through lecture videos and perform exploratory analysis on worldwide COVID-19 data.
#Load libraries needed.
library(easypackages)
libraries("rgdal", "gdalUtils", "raster", "maptools", "tigris", "ggplot2", "cowplot", "dplyr", "sp", "sf", "rgdal", "stringr", "RColorBrewer", "ggcorrplot", "pander", "forcats", "ggsn", "mapproj", "rlist", "lubridate")#Reading in world countries shapefile.
data(wrld_simpl)
wrld_simpl@data[["NAME"]]## [1] Antigua and Barbuda
## [2] Algeria
## [3] Azerbaijan
## [4] Albania
## [5] Armenia
## [6] Angola
## [7] American Samoa
## [8] Argentina
## [9] Australia
## [10] Bahrain
## [11] Barbados
## [12] Bermuda
## [13] Bahamas
## [14] Bangladesh
## [15] Belize
## [16] Bosnia and Herzegovina
## [17] Bolivia
## [18] Burma
## [19] Benin
## [20] Solomon Islands
## [21] Brazil
## [22] Bulgaria
## [23] Brunei Darussalam
## [24] Canada
## [25] Cambodia
## [26] Sri Lanka
## [27] Congo
## [28] Democratic Republic of the Congo
## [29] Burundi
## [30] China
## [31] Afghanistan
## [32] Bhutan
## [33] Chile
## [34] Cayman Islands
## [35] Cameroon
## [36] Chad
## [37] Comoros
## [38] Colombia
## [39] Costa Rica
## [40] Central African Republic
## [41] Cuba
## [42] Cape Verde
## [43] Cook Islands
## [44] Cyprus
## [45] Denmark
## [46] Djibouti
## [47] Dominica
## [48] Dominican Republic
## [49] Ecuador
## [50] Egypt
## [51] Ireland
## [52] Equatorial Guinea
## [53] Estonia
## [54] Eritrea
## [55] El Salvador
## [56] Ethiopia
## [57] Austria
## [58] Czech Republic
## [59] French Guiana
## [60] Finland
## [61] Fiji
## [62] Falkland Islands (Malvinas)
## [63] Micronesia, Federated States of
## [64] French Polynesia
## [65] France
## [66] Gambia
## [67] Gabon
## [68] Georgia
## [69] Ghana
## [70] Grenada
## [71] Greenland
## [72] Germany
## [73] Guam
## [74] Greece
## [75] Guatemala
## [76] Guinea
## [77] Guyana
## [78] Haiti
## [79] Honduras
## [80] Croatia
## [81] Hungary
## [82] Iceland
## [83] India
## [84] Iran (Islamic Republic of)
## [85] Israel
## [86] Italy
## [87] Cote d'Ivoire
## [88] Iraq
## [89] Japan
## [90] Jamaica
## [91] Jordan
## [92] Kenya
## [93] Kyrgyzstan
## [94] Korea, Democratic People's Republic of
## [95] Kiribati
## [96] Korea, Republic of
## [97] Kuwait
## [98] Kazakhstan
## [99] Lao People's Democratic Republic
## [100] Lebanon
## [101] Latvia
## [102] Belarus
## [103] Lithuania
## [104] Liberia
## [105] Slovakia
## [106] Liechtenstein
## [107] Libyan Arab Jamahiriya
## [108] Madagascar
## [109] Martinique
## [110] Mongolia
## [111] Montserrat
## [112] The former Yugoslav Republic of Macedonia
## [113] Mali
## [114] Morocco
## [115] Mauritius
## [116] Mauritania
## [117] Malta
## [118] Oman
## [119] Maldives
## [120] Mexico
## [121] Malaysia
## [122] Mozambique
## [123] Malawi
## [124] New Caledonia
## [125] Niue
## [126] Niger
## [127] Aruba
## [128] Anguilla
## [129] Belgium
## [130] Hong Kong
## [131] Northern Mariana Islands
## [132] Faroe Islands
## [133] Andorra
## [134] Gibraltar
## [135] Isle of Man
## [136] Luxembourg
## [137] Macau
## [138] Monaco
## [139] Palestine
## [140] Montenegro
## [141] Mayotte
## [142] Aaland Islands
## [143] Norfolk Island
## [144] Cocos (Keeling) Islands
## [145] Antarctica
## [146] Bouvet Island
## [147] French Southern and Antarctic Lands
## [148] Heard Island and McDonald Islands
## [149] British Indian Ocean Territory
## [150] Christmas Island
## [151] United States Minor Outlying Islands
## [152] Vanuatu
## [153] Nigeria
## [154] Netherlands
## [155] Norway
## [156] Nepal
## [157] Nauru
## [158] Suriname
## [159] Nicaragua
## [160] New Zealand
## [161] Paraguay
## [162] Peru
## [163] Pakistan
## [164] Poland
## [165] Panama
## [166] Portugal
## [167] Papua New Guinea
## [168] Guinea-Bissau
## [169] Qatar
## [170] Reunion
## [171] Romania
## [172] Republic of Moldova
## [173] Philippines
## [174] Puerto Rico
## [175] Russia
## [176] Rwanda
## [177] Saudi Arabia
## [178] Saint Kitts and Nevis
## [179] Seychelles
## [180] South Africa
## [181] Lesotho
## [182] Botswana
## [183] Senegal
## [184] Slovenia
## [185] Sierra Leone
## [186] Singapore
## [187] Somalia
## [188] Spain
## [189] Saint Lucia
## [190] Sudan
## [191] Sweden
## [192] Syrian Arab Republic
## [193] Switzerland
## [194] Trinidad and Tobago
## [195] Thailand
## [196] Tajikistan
## [197] Tokelau
## [198] Tonga
## [199] Togo
## [200] Sao Tome and Principe
## [201] Tunisia
## [202] Turkey
## [203] Tuvalu
## [204] Turkmenistan
## [205] United Republic of Tanzania
## [206] Uganda
## [207] United Kingdom
## [208] Ukraine
## [209] United States
## [210] Burkina Faso
## [211] Uruguay
## [212] Uzbekistan
## [213] Saint Vincent and the Grenadines
## [214] Venezuela
## [215] British Virgin Islands
## [216] Viet Nam
## [217] United States Virgin Islands
## [218] Namibia
## [219] Wallis and Futuna Islands
## [220] Samoa
## [221] Swaziland
## [222] Yemen
## [223] Zambia
## [224] Zimbabwe
## [225] Indonesia
## [226] Guadeloupe
## [227] Netherlands Antilles
## [228] United Arab Emirates
## [229] Timor-Leste
## [230] Pitcairn Islands
## [231] Palau
## [232] Marshall Islands
## [233] Saint Pierre and Miquelon
## [234] Saint Helena
## [235] San Marino
## [236] Turks and Caicos Islands
## [237] Western Sahara
## [238] Serbia
## [239] Holy See (Vatican City)
## [240] Svalbard
## [241] Saint Martin
## [242] Saint Barthelemy
## [243] Guernsey
## [244] Jersey
## [245] South Georgia South Sandwich Islands
## [246] Taiwan
## 246 Levels: Aaland Islands Afghanistan Albania Algeria ... Zimbabwe
plot(wrld_simpl) #Filtering COVID data (from COVID exploratory analysis.)
owid <- read.csv(url("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"), na.strings = c("", "NA"), colClasses = c("date" = "Date"), header = TRUE)
owid.filtered <- owid %>%
rename(iso.code = iso_code, totcases = total_cases, totdeaths = total_deaths, totcases.mil = total_cases_per_million, totdeaths.mil = total_deaths_per_million, pop.density = population_density, med.age = median_age, older70 = aged_70_older, gdp.cap = gdp_per_capita, card.death = cardiovasc_death_rate, diab.prev = diabetes_prevalence, fem.smokers = female_smokers, male.smokers = male_smokers) %>%
select(c(iso.code:totcases, totdeaths, totcases.mil, totdeaths.mil, population, pop.density, med.age, older70, gdp.cap, card.death, diab.prev, fem.smokers, male.smokers)) %>% #Renamed variables and filtered to keep only those of interest.
filter(date == "2021-06-28") #Filtered out all but most recent date (most recent date is a running sum of total cases).
owid.countries <- owid.filtered %>%
filter(!is.na(continent)) #Removed all rows with NA for continent, leaving behind only countries. setwd("./World_Rasters")
left <- raster('Left_Half_World_NO2.tif') %>%
flip(direction='y')
right <- raster('Right_Half_World_NO2.tif') %>%
flip(direction='y')
x <- list(left, right)
names(x)[1:2] <- c('x', 'y')
x$fun <- mean
x$na.rm <- TRUE
world.NO2.r <- do.call(mosaic, x)
rm(left, right, x)
#Converting to data frame for plotting.
r.crop <- crop(world.NO2.r, extent(wrld_simpl))
r.crop <- mask(r.crop, wrld_simpl)
r.pts <- rasterToPoints(r.crop, spatial = TRUE)
world.NO2.df <- data.frame(r.pts)
rm(r.pts, r.crop)
myPalette <- colorRampPalette(brewer.pal(9, "YlOrRd"))
my_theme <- function() {
theme_minimal() + # shorthand for white background color
theme(axis.line = element_blank(), # further customization of theme components
axis.text = element_blank(), # remove x and y axis text and labels
axis.title = element_blank(),
panel.grid = element_line(color = "white"), # make grid lines invisible
legend.key.size = unit(0.5, "cm"), # increase size of legend
legend.text = element_text(size = 7), # increase legend text size
legend.title = element_text(size = 7)) # increase legend title size
}
ggplot() +
geom_raster(data = world.NO2.df, aes(x = x, y = y, fill = layer)) +
geom_polygon(data = wrld_simpl, aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle("World NO2 Concentrations 07-10-2018 to 07-10-2019") +
#scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100)) +
north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
scalebar(x.min = -180, y.min = -90, x.max = 180, y.max = 90, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 2, model = 'WGS84') +
coord_equal()#R base plots.
plot(world.NO2.r)
plot(wrld_simpl, add = TRUE)wrld_simpl@data$NAME <- as.character(wrld_simpl@data$NAME)
#Changing shapefile country names to match COVID dataset:
wrld_simpl@data$NAME[wrld_simpl$NAME=="Brunei Darussalam"] <- "Brunei"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Czech Republic"] <- "Czechia"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Democratic Republic of the Congo"] <- "Democratic Republic of Congo"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Faroe Islands"] <- "Faeroe Islands"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Iran (Islamic Republic of)"] <- "Iran"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Lao People's Democratic Republic"] <- "Laos"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Libyan Arab Jamahiriya"] <- "Libya"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Republic of Moldova"] <- "Moldova"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Burma"] <- "Myanmar"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Korea, Republic of"] <- "South Korea"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Korea, Democratic People's Republic of"] <- "North Korea"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Syrian Arab Republic"] <- "Syria"
wrld_simpl@data$NAME[wrld_simpl$NAME=="The former Yugoslav Republic of Macedonia"] <- "North Macedonia"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Macau"] <- "Macao"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Micronesia, Federated States of"] <- "Micronesia (country)"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Timor-Leste"] <- "Timor"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Holy See (Vatican City)"] <- "Vatican"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Viet Nam"] <- "Vietnam"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Wallis and Futuna Islands"] <- "Wallis and Futuna"
#Missing from shapefiles but in OWID data: Kosovo, Tanzania, Eswatini
owid.countries$location[!(owid.countries$location %in% wrld_simpl@data$NAME)]## [1] "Curacao" "Eswatini" "Kosovo" "South Sudan" "Tanzania"
#Missing from OWID data but in shapefiles:
wrld_simpl@data$NAME[!(wrld_simpl@data$NAME%in% owid.countries$location)]## [1] "American Samoa"
## [2] "French Guiana"
## [3] "Falkland Islands (Malvinas)"
## [4] "Guam"
## [5] "North Korea"
## [6] "Martinique"
## [7] "Montserrat"
## [8] "Niue"
## [9] "Anguilla"
## [10] "Northern Mariana Islands"
## [11] "Mayotte"
## [12] "Aaland Islands"
## [13] "Norfolk Island"
## [14] "Cocos (Keeling) Islands"
## [15] "Antarctica"
## [16] "Bouvet Island"
## [17] "French Southern and Antarctic Lands"
## [18] "Heard Island and McDonald Islands"
## [19] "British Indian Ocean Territory"
## [20] "Christmas Island"
## [21] "United States Minor Outlying Islands"
## [22] "Reunion"
## [23] "Puerto Rico"
## [24] "Tokelau"
## [25] "Tonga"
## [26] "Tuvalu"
## [27] "Turkmenistan"
## [28] "United Republic of Tanzania"
## [29] "British Virgin Islands"
## [30] "United States Virgin Islands"
## [31] "Swaziland"
## [32] "Guadeloupe"
## [33] "Netherlands Antilles"
## [34] "Pitcairn Islands"
## [35] "Palau"
## [36] "Saint Pierre and Miquelon"
## [37] "Saint Helena"
## [38] "Turks and Caicos Islands"
## [39] "Western Sahara"
## [40] "Svalbard"
## [41] "Saint Martin"
## [42] "Saint Barthelemy"
## [43] "Jersey"
## [44] "South Georgia South Sandwich Islands"
#Function to extract mean NO2 values by country shapefile.
extraction.fxn <- function(x) {
r.vals <- raster::extract(world.NO2.r, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl@data[wrld_simpl$NAME==as.character(x), ], r.mean)
rm(r.mean, r.vals)
names(final.df)[5] <- "location"
names(final.df)[6] <- "Area"
names(final.df)[10] <- "Longitude"
names(final.df)[11] <- "Latitude"
names(final.df)[12] <- "NO2_Concentration"
final.df <- final.df %>% dplyr::select(location, Area, Longitude, Latitude, NO2_Concentration)
}
#Extracting mean NO2 values and adding to coordinates data to make final data frame.
country.shp.list <- wrld_simpl@data$NAME
x <- country.shp.list
total.df <- purrr::map_dfr(x, extraction.fxn)
#Read in IHME regions data.
IHME.regions.df <- read.csv(file = 'IHME_regions.csv', header = TRUE)
IHME.regions.df <- IHME.regions.df %>% rename(location = Countries) %>% dplyr::select("Super.region", "Regions", "location")
#Merged geospatial data set with COVID data (variables come from COVID exploratory analysis RMD file).
covid.NO2.df <- right_join(IHME.regions.df, total.df, by = "location")
covid.NO2.df <- full_join(covid.NO2.df, owid.countries, by = "location")
covid.NO2.df$Regions <- as.factor(covid.NO2.df$Regions)
covid.NO2.df$Super.region <- as.factor(covid.NO2.df$Super.region)#isolate country using extract and crop for both pop and NO2
#calculate product of population and pollution for each cell using overlay function
#sum all of these products
#divide this sum by tot pop for country from df
pop.den.r <- raster("gpw_v4_population_density/pop.density.tif") #Read in GPW v4 population raster data.
pop.weighting.fxn <- function(x) {
pop.cropped <- crop(pop.den.r, extent(wrld_simpl[wrld_simpl$NAME==as.character(x), ]))
pop.cropped <- mask(pop.cropped, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
NO2.cropped <- crop(world.NO2.r, extent(wrld_simpl[wrld_simpl$NAME==as.character(x), ]))
NO2.cropped <- mask(NO2.cropped, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
overlay.cropped <- overlay(pop.cropped, NO2.cropped, fun=function(x,y){(x*y)} )
sum <- cellStats(overlay.cropped, stat='sum')
mod.covid.df <- filter(covid.NO2.df, location == as.character(x))
weighted.NO2 <- sum / mod.covid.df$population %>% data.frame()
df <- wrld_simpl@data[wrld_simpl$NAME==as.character(x), ]
weighted.NO2.df <- merge(df, weighted.NO2)
names(weighted.NO2.df)[5] <- "location"
names(weighted.NO2.df)[12] <- "Population weighted NO2"
weighted.NO2.df <- weighted.NO2.df %>% dplyr::select(location, `Population weighted NO2`)
} #Function to weight NO2 by population for each country.
x <- country.shp.list[country.shp.list!="Antarctica"]
weighted.NO2.df <- purrr::map_dfr(x, pop.weighting.fxn)
weighted.covid.NO2.df <- full_join(covid.NO2.df, weighted.NO2.df, by = "location") #Join all data frames. wrld.df <- as.data.frame(wrld_simpl)
wrld.fort <- fortify(wrld_simpl, region = "NAME")
names(wrld.fort)[6] <- "location" #Getting world shapefile into data frame for ggplot.
covid.NO2.plot.df <- full_join(wrld.fort, weighted.covid.NO2.df, by ="location")
covid.NO2.plot.df <- arrange(covid.NO2.plot.df, order) #Joined full weighted COVID data set with shapefile data set.
map <- ggplot(data = covid.NO2.plot.df, aes(x = long, y = lat, group = group))
map +
geom_polygon(aes(fill = NO2_Concentration), color = 'black', size = 0.1) +
my_theme() +
ggtitle("World Raw NO2 Concentrations 07-10-2018 to 07-10-2019") +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100)) +
north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
scalebar(covid.NO2.plot.df, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 4, model = 'WGS84') +
coord_equal() #Plot of raw NO2 concentrations.
map +
geom_polygon(aes(fill = `Population weighted NO2`), color = 'black', size = 0.1) +
my_theme() +
ggtitle("World Weighted NO2 Concentrations 07-10-2018 to 07-10-2019") +
scale_fill_gradientn(name = "Weighted NO2 \nConcentration", colours = myPalette(100)) +
north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
scalebar(covid.NO2.plot.df, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 4, model = 'WGS84') +
coord_equal() #Plot of population weighted NO2. #Statistics on world NO2 concentrations.
covid.NO2.df %>%
summarize(variable = "NO2_Concentration", mean_NO2 = mean(NO2_Concentration, na.rm = T), st_dev_cty = sd(NO2_Concentration, na.rm = T)) %>%
pander| variable | mean_NO2 | st_dev_cty |
|---|---|---|
| NO2_Concentration | 1.692e-05 | 1.103e-05 |
covid.NO2.df %>%
summarize(variable = "NO2_Concentration",
q0.2 = quantile(NO2_Concentration, 0.2, na.rm = T),
q0.4 = quantile(NO2_Concentration, 0.4, na.rm = T),
q0.6 = quantile(NO2_Concentration, 0.6, na.rm = T),
q0.8 = quantile(NO2_Concentration, 0.8, na.rm = T)) %>%
pander| variable | q0.2 | q0.4 | q0.6 | q0.8 |
|---|---|---|---|---|
| NO2_Concentration | 8.389e-06 | 1.204e-05 | 1.728e-05 | 2.352e-05 |
#Histogram of NO2 concentrations.
covid.NO2.df %>%
ggplot(aes(NO2_Concentration)) +
geom_histogram(color = "black",fill = "grey") +
geom_vline(xintercept = mean(covid.NO2.df$NO2_Concentration), lwd = 2) +
labs(title = "Distribution of Tropospheric NO2",
x = "NO2 Concentration (mol/m^2)",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,8E-5,0.5E-5)) +
theme(axis.text.x = element_text(angle = 45,hjust=1))#Statistics on world COVID case statistics.
covid.NO2.df %>%
summarize(variable = "totcases.mil", mean_cases = mean(totcases.mil, na.rm = T), st_dev_cty = sd(totcases, na.rm = T)) %>%
pander| variable | mean_cases | st_dev_cty |
|---|---|---|
| totcases.mil | 35928 | 3611805 |
covid.NO2.df %>%
summarize(variable = "totcases.mil",
q0.2 = quantile(totcases.mil, 0.2, na.rm = TRUE),
q0.4 = quantile(totcases.mil, 0.4, na.rm = TRUE),
q0.6 = quantile(totcases.mil, 0.6, na.rm = TRUE),
q0.8 = quantile(totcases.mil, 0.8, na.rm = TRUE)) %>%
pander| variable | q0.2 | q0.4 | q0.6 | q0.8 |
|---|---|---|---|---|
| totcases.mil | 1662 | 8186 | 33123 | 73388 |
#Histogram of COVID case statistics.
max(covid.NO2.df$totcases.mil, na.rm = TRUE)## [1] 179667.4
covid.NO2.df %>%
ggplot(aes(totcases.mil)) +
geom_histogram(color = "black",fill = "grey") +
geom_vline(xintercept = mean(covid.NO2.df$totcases.mil), lwd = 2) +
labs(title = "Distribution of World COVID-19 Cases per Million",
x = "Total COVID-19 Cases per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 179667.4,10000)) +
theme(axis.text.x = element_text(angle = 45,hjust=1))#Statistics on world COVID death statistics.
covid.NO2.df %>%
summarize(variable = "totdeaths.mil", mean_cases = mean(totcases.mil, na.rm = T), st_dev_cty = sd(totcases, na.rm = T)) %>%
pander| variable | mean_cases | st_dev_cty |
|---|---|---|
| totdeaths.mil | 35928 | 3611805 |
covid.NO2.df %>%
summarize(variable = "totdeaths.mil",
q0.2 = quantile(totdeaths.mil, 0.2, na.rm = TRUE),
q0.4 = quantile(totdeaths.mil, 0.4, na.rm = TRUE),
q0.6 = quantile(totdeaths.mil, 0.6, na.rm = TRUE),
q0.8 = quantile(totdeaths.mil, 0.8, na.rm = TRUE)) %>%
pander| variable | q0.2 | q0.4 | q0.6 | q0.8 |
|---|---|---|---|---|
| totdeaths.mil | 27.78 | 132.2 | 536.1 | 1310 |
#Histogram of COVID statistics.
max(covid.NO2.df$totdeaths.mil, na.rm = TRUE)## [1] 5820.087
covid.NO2.df %>%
ggplot(aes(totdeaths.mil)) +
geom_histogram(color = "black",fill = "grey") +
geom_vline(xintercept = mean(covid.NO2.df$totdeaths.mil, na.rm = TRUE), lwd = 2) +
labs(title = "Distribution of World COVID-19 Deaths per Million",
x = "Total COVID-19 Deaths per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,5820.087,500)) +
theme(axis.text.x = element_text(angle = 45,hjust=1))#Summary statistics, NO2, grouping by Super.region
covid.NO2.df %>%
group_by(Super.region) %>%
summarize(mean_NO2 = mean(NO2_Concentration, na.rm = TRUE), sd_NO2 = sd(NO2_Concentration, na.rm = TRUE)) %>%
pander| Super.region | mean_NO2 | sd_NO2 |
|---|---|---|
| Central Europe, Eastern Europe, and Central Asia | 2.195e-05 | 7.39e-06 |
| High-income | 2.472e-05 | 1.7e-05 |
| Latin America and Caribbean | 1.132e-05 | 3.362e-06 |
| North Africa and the Middle East | 2.084e-05 | 9.85e-06 |
| South Asia | 2.998e-05 | 5.271e-06 |
| Southeast Asia, East Asia, and Oceania | 1.347e-05 | 1.135e-05 |
| Sub-Saharan Africa | 1.7e-05 | 6.46e-06 |
| NA | 1.149e-05 | 8.931e-06 |
#Summary statistics, COVID cases, grouping by Super.region
covid.NO2.df %>%
group_by(Super.region) %>%
summarize(mean_cases.mil = mean(totcases.mil, na.rm = TRUE), sd_cases.mil = sd(totcases.mil, na.rm = TRUE)) %>%
pander| Super.region | mean_cases.mil | sd_cases.mil |
|---|---|---|
| Central Europe, Eastern Europe, and Central Asia | 68485 | 39342 |
| High-income | 63327 | 42290 |
| Latin America and Caribbean | 30927 | 26009 |
| North Africa and the Middle East | 42067 | 39554 |
| South Asia | 11243 | 9764 |
| Southeast Asia, East Asia, and Oceania | 16298 | 41810 |
| Sub-Saharan Africa | 6235 | 11443 |
| NA | 41135 | 48023 |
#Summary statistics, COVID deaths, grouping by Super.region
covid.NO2.df %>%
group_by(Super.region) %>%
summarize(mean_deaths.mil = mean(totdeaths.mil, na.rm = TRUE), sd_deaths.mil = sd(totdeaths.mil, na.rm = TRUE)) %>%
pander| Super.region | mean_deaths.mil | sd_deaths.mil |
|---|---|---|
| Central Europe, Eastern Europe, and Central Asia | 1450 | 956.1 |
| High-income | 1014 | 728 |
| Latin America and Caribbean | 935.3 | 1165 |
| North Africa and the Middle East | 466.9 | 379.1 |
| South Asia | 157.5 | 135.2 |
| Southeast Asia, East Asia, and Oceania | 112.8 | 178.8 |
| Sub-Saharan Africa | 105.6 | 192.2 |
| NA | 765.5 | 906.5 |
#Histogram, NO2, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(NO2_Concentration)) +
geom_histogram(color = "black",fill = "grey") +
labs(title = "Distribution of NO2 Concentrations Relative to Super Region",
x = "NO2 Concentration (mol/m^2)",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,8E-5,0.5E-5)) +
scale_y_continuous(breaks = seq(0,12,2)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
facet_grid(Super.region~.)#Histogram, COVID cases, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(totcases.mil)) +
geom_histogram(color = "black",fill = "grey") +
labs(title = "Distribution of COVID Cases Relative to Super Region",
x = "COVID-19 Cases per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 179667.4,10000)) +
scale_y_continuous(breaks = seq(0,32,8)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
facet_grid(Super.region~.)#Histogram, COVID deaths, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(totdeaths.mil)) +
geom_histogram(color = "black",fill = "grey") +
labs(title = "Distribution of COVID Deaths Relative to Super Region",
x = "COVID-19 Deaths per Million",
y = "Number of Countries") +
theme_minimal() +
scale_x_continuous(breaks = seq(0,5820.087,500)) +
scale_y_continuous(breaks = seq(0,32,8)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
facet_grid(Super.region~.)#Boxplot, NO2, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(Super.region,NO2_Concentration)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "Distribution of NO2 relative to Super Region",
x = "Super Region",
y = "NO2 Concentration (mol/m^2)") +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_y_continuous(breaks = seq(0,8E-5,0.5E-5)) #Boxplot, COVID cases, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(Super.region,totcases.mil)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "Distribution of COVID Cases Relative to Super Region",
x = "Super Region",
y = "COVID-19 Cases per Million") +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_y_continuous(breaks = seq(0, 179667.4,10000))#Boxplot, COVID deaths, grouping by Super.region
covid.NO2.df %>%
ggplot(aes(Super.region,totdeaths.mil)) +
geom_boxplot() +
geom_jitter(color="red", size=0.4, alpha=0.9) +
labs(title = "Distribution of COVID Deaths Relative to Super Region",
x = "Super Region",
y = "COVID-19 Deaths per Million") +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_y_continuous(breaks = seq(0,5820.087,500))covid.NO2.df %>%
select(NO2_Concentration, totcases.mil, totdeaths.mil, pop.density, med.age, card.death, diab.prev, fem.smokers, male.smokers) %>%
cor(use="pairwise.complete.obs") %>%
pander| Â | NO2_Concentration | totcases.mil | totdeaths.mil |
|---|---|---|---|
| NO2_Concentration | 1 | 0.3165 | 0.2235 |
| totcases.mil | 0.3165 | 1 | 0.7135 |
| totdeaths.mil | 0.2235 | 0.7135 | 1 |
| pop.density | 0.204 | 0.0449 | -0.03193 |
| med.age | 0.3898 | 0.5935 | 0.5198 |
| card.death | -0.234 | -0.2975 | -0.2144 |
| diab.prev | -0.2225 | 0.02145 | -0.01058 |
| fem.smokers | 0.2739 | 0.5659 | 0.5681 |
| male.smokers | 0.04505 | 0.09914 | 0.07643 |
| Â | pop.density | med.age | card.death | diab.prev |
|---|---|---|---|---|
| NO2_Concentration | 0.204 | 0.3898 | -0.234 | -0.2225 |
| totcases.mil | 0.0449 | 0.5935 | -0.2975 | 0.02145 |
| totdeaths.mil | -0.03193 | 0.5198 | -0.2144 | -0.01058 |
| pop.density | 1 | 0.1484 | -0.1755 | 0.01393 |
| med.age | 0.1484 | 1 | -0.3436 | 0.1462 |
| card.death | -0.1755 | -0.3436 | 1 | 0.1519 |
| diab.prev | 0.01393 | 0.1462 | 0.1519 | 1 |
| fem.smokers | -0.04709 | 0.6391 | -0.1427 | 0.09079 |
| male.smokers | 0.0001981 | 0.1816 | 0.4285 | 0.2061 |
| Â | fem.smokers | male.smokers |
|---|---|---|
| NO2_Concentration | 0.2739 | 0.04505 |
| totcases.mil | 0.5659 | 0.09914 |
| totdeaths.mil | 0.5681 | 0.07643 |
| pop.density | -0.04709 | 0.0001981 |
| med.age | 0.6391 | 0.1816 |
| card.death | -0.1427 | 0.4285 |
| diab.prev | 0.09079 | 0.2061 |
| fem.smokers | 1 | 0.2261 |
| male.smokers | 0.2261 | 1 |
#Visual representation of correlations.
covid.NO2.df %>%
select(NO2_Concentration, totcases.mil, totdeaths.mil, pop.density, med.age, card.death, diab.prev, fem.smokers, male.smokers) %>%
cor(use="pairwise.complete.obs") %>%
ggcorrplot(type = "lower", ggtheme = theme_minimal, colors = c("#6D9EC1","white","#E46726"),
show.diag = T,
lab = T, lab_size = 2, #shows correlation values and sets size of text
title = "Correlation Matrix for the covid.NO2 dataset",
legend.title = "Correlation Value",
outline.color = "white",
hc.order = T) #orders by variables most related#NO2 concentration vs COVID cases.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "World COVID-19 Cases per Million People vs. NO2 Concentration") cor(covid.NO2.df$totcases.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3165363
NO2.lm.cases <- lm(totcases.mil ~ NO2_Concentration, data = covid.NO2.df)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -90810 -28883 -12346 22261 137741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.364e+04 5.741e+03 2.376 0.0185 *
## NO2_Concentration 1.211e+09 2.660e+08 4.551 9.62e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39170 on 186 degrees of freedom
## (63 observations deleted due to missingness)
## Multiple R-squared: 0.1002, Adjusted R-squared: 0.09536
## F-statistic: 20.71 on 1 and 186 DF, p-value: 9.621e-06
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) 2.313445e+03 2.496619e+04
## NO2_Concentration 6.858510e+08 1.735461e+09
#NO2 concentration vs COVID deaths.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "World COVID-19 Deaths per Million People vs. NO2 Concentration") cor(covid.NO2.df$totdeaths.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.2234735
NO2.lm.deaths <- lm(totdeaths.mil ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1484.4 -571.6 -372.9 387.4 5333.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.332e+02 1.284e+02 2.596 0.01021 *
## NO2_Concentration 1.797e+07 5.858e+06 3.067 0.00249 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 841 on 179 degrees of freedom
## (70 observations deleted due to missingness)
## Multiple R-squared: 0.04994, Adjusted R-squared: 0.04463
## F-statistic: 9.409 on 1 and 179 DF, p-value: 0.002494
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 7.992357e+01 5.864729e+02
## NO2_Concentration 6.409060e+06 2.952679e+07
regions.list <- c("High-income", "Latin America and Caribbean", "Sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "Southeast Asia, East Asia, and Oceania", "Central Europe, Eastern Europe, and Central Asia")
sub.regions.list <- c("High-income Asia Pacific", "High-income Australasia", "High-income North America", "High-income Southern Latin America", "High-income Western Europe", "Caribbean", "Central Latin America", "Tropical Latin America", "Andean Latin America", "Central sub-Saharan Africa", "Eastern sub-Saharan Africa", "Southern sub-Saharan Africa", "Western sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "East Asia", "Oceania", "Southeast Asia", "Central Asia", "Central Europe", "Eastern Europe")
#Function for bivariate analysis by region.
cases.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
ggplot(aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.cases <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
lm(totcases.mil ~ NO2_Concentration,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2(i))
print(NO2.lm.cases(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78182 -33098 9082 23797 114373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52150 13590 3.838 0.000573 ***
## NO2_Concentration 438853382 448383204 0.979 0.335282
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42320 on 31 degrees of freedom
## Multiple R-squared: 0.02998, Adjusted R-squared: -0.001316
## F-statistic: 0.9579 on 1 and 31 DF, p-value: 0.3353
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32144 -16754 -7505 2170 62157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43205 17159 2.518 0.0183 *
## NO2_Concentration -987233958 1439547473 -0.686 0.4989
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26160 on 26 degrees of freedom
## Multiple R-squared: 0.01777, Adjusted R-squared: -0.02001
## F-statistic: 0.4703 on 1 and 26 DF, p-value: 0.4989
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9740 -5594 -3283 -1344 48613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13633 6738 2.023 0.0496 *
## NO2_Concentration -454287265 399732943 -1.136 0.2624
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11400 on 41 degrees of freedom
## Multiple R-squared: 0.03054, Adjusted R-squared: 0.006895
## F-statistic: 1.292 on 1 and 41 DF, p-value: 0.2624
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43562 -23508 -12602 19146 94637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4333 17595 -0.246 0.8081
## NO2_Concentration 2226431334 766638920 2.904 0.0091 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33770 on 19 degrees of freedom
## Multiple R-squared: 0.3074, Adjusted R-squared: 0.271
## F-statistic: 8.434 on 1 and 19 DF, p-value: 0.009095
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## 1 2 3 4 5
## -9195 -7454 9986 10206 -3543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2922 31378 -0.093 0.932
## NO2_Concentration 472550653 1034079847 0.457 0.679
##
## Residual standard error: 10900 on 3 degrees of freedom
## Multiple R-squared: 0.06508, Adjusted R-squared: -0.2466
## F-statistic: 0.2088 on 1 and 3 DF, p-value: 0.6787
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36541 -25947 -10363 168 121820
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.611e+04 1.930e+04 2.389 0.0296 *
## NO2_Concentration -1.636e+09 1.047e+09 -1.563 0.1376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44650 on 16 degrees of freedom
## Multiple R-squared: 0.1324, Adjusted R-squared: 0.07822
## F-statistic: 2.443 on 1 and 16 DF, p-value: 0.1376
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56913 -26986 -7456 21585 92022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.361e+03 2.029e+04 0.363 0.71967
## NO2_Concentration 2.772e+09 8.726e+08 3.177 0.00382 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34030 on 26 degrees of freedom
## Multiple R-squared: 0.2796, Adjusted R-squared: 0.2519
## F-statistic: 10.09 on 1 and 26 DF, p-value: 0.003815
deaths.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
ggplot(aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.deaths <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(NO2_Concentration)) %>%
lm(totdeaths.mil ~ NO2_Concentration,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2(i))
print(NO2.lm.deaths(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1227.49 -777.20 60.22 659.16 1133.85
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 856.1 235.1 3.641 0.000979 ***
## NO2_Concentration 6198452.0 7757270.8 0.799 0.430341
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 732.1 on 31 degrees of freedom
## Multiple R-squared: 0.02018, Adjusted R-squared: -0.01143
## F-statistic: 0.6385 on 1 and 31 DF, p-value: 0.4303
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -958.7 -603.5 -376.9 335.6 4779.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.352e+03 7.743e+02 1.746 0.0925 .
## NO2_Concentration -3.652e+07 6.496e+07 -0.562 0.5788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1180 on 26 degrees of freedom
## Multiple R-squared: 0.01201, Adjusted R-squared: -0.02599
## F-statistic: 0.3161 on 1 and 26 DF, p-value: 0.5788
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105.40 -86.65 -69.60 -11.17 915.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 126.8 114.9 1.104 0.276
## NO2_Concentration -1301481.1 6816614.0 -0.191 0.850
##
## Residual standard error: 194.5 on 41 degrees of freedom
## Multiple R-squared: 0.0008883, Adjusted R-squared: -0.02348
## F-statistic: 0.03645 on 1 and 41 DF, p-value: 0.8495
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -460.2 -226.6 -129.6 173.6 861.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.023e+02 1.805e+02 0.567 0.5775
## NO2_Concentration 1.749e+07 7.864e+06 2.225 0.0384 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 346.4 on 19 degrees of freedom
## Multiple R-squared: 0.2066, Adjusted R-squared: 0.1649
## F-statistic: 4.949 on 1 and 19 DF, p-value: 0.03843
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## 1 2 3 4 5
## -108.53 -143.81 122.43 149.24 -19.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.313e-01 4.400e+02 0.001 1.000
## NO2_Concentration 5.247e+06 1.450e+07 0.362 0.741
##
## Residual standard error: 152.9 on 3 degrees of freedom
## Multiple R-squared: 0.04182, Adjusted R-squared: -0.2776
## F-statistic: 0.1309 on 1 and 3 DF, p-value: 0.7414
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -172.33 -125.39 -27.02 49.87 515.71
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.137e+02 7.379e+01 2.897 0.0105 *
## NO2_Concentration -6.528e+06 4.001e+06 -1.632 0.1222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 170.7 on 16 degrees of freedom
## Multiple R-squared: 0.1427, Adjusted R-squared: 0.0891
## F-statistic: 2.663 on 1 and 16 DF, p-value: 0.1222
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1217.34 -503.24 13.44 411.01 1779.79
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -440.9 429.5 -1.026 0.314
## NO2_Concentration 85772376.4 18474796.8 4.643 8.63e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 720.4 on 26 degrees of freedom
## Multiple R-squared: 0.4533, Adjusted R-squared: 0.4322
## F-statistic: 21.55 on 1 and 26 DF, p-value: 8.633e-05
#NO2 concentration vs COVID cases.
ggplot(data = weighted.covid.NO2.df, aes(x = `Population weighted NO2`, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "World COVID-19 Cases per Million People vs. Weighted NO2 Concentration") cor(weighted.covid.NO2.df$totcases.mil, weighted.covid.NO2.df$`Population weighted NO2`, use = "complete.obs")## [1] 0.1993439
NO2.lm.cases <- lm(totcases.mil ~ `Population weighted NO2`, data = weighted.covid.NO2.df)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = weighted.covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57105 -30596 -15063 26642 151847
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.782e+04 4.250e+03 6.547 5.57e-10 ***
## `Population weighted NO2` 4.406e+12 1.588e+12 2.774 0.0061 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40460 on 186 degrees of freedom
## (63 observations deleted due to missingness)
## Multiple R-squared: 0.03974, Adjusted R-squared: 0.03458
## F-statistic: 7.697 on 1 and 186 DF, p-value: 0.006095
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) 1.943685e+04 3.620427e+04
## `Population weighted NO2` 1.273021e+12 7.539258e+12
#NO2 concentration vs COVID deaths.
ggplot(data = weighted.covid.NO2.df, aes(x = `Population weighted NO2`, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1, size = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "World COVID-19 Deaths per Million People vs. Weighted NO2 Concentration") cor(weighted.covid.NO2.df$totdeaths.mil, weighted.covid.NO2.df$`Population weighted NO2`, use = "complete.obs")## [1] 0.1763396
NO2.lm.deaths <- lm(totdeaths.mil ~ `Population weighted NO2`, data = weighted.covid.NO2.df,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = weighted.covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1272.7 -577.4 -403.9 348.4 5236.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.139e+02 9.285e+01 5.534 1.1e-07 ***
## `Population weighted NO2` 8.161e+10 3.405e+10 2.397 0.0176 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 849.3 on 179 degrees of freedom
## (70 observations deleted due to missingness)
## Multiple R-squared: 0.0311, Adjusted R-squared: 0.02568
## F-statistic: 5.745 on 1 and 179 DF, p-value: 0.01757
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 3.306167e+02 6.970757e+02
## `Population weighted NO2` 1.442025e+10 1.487977e+11
regions.list <- c("High-income", "Latin America and Caribbean", "Sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "Southeast Asia, East Asia, and Oceania", "Central Europe, Eastern Europe, and Central Asia")
sub.regions.list <- c("High-income Asia Pacific", "High-income Australasia", "High-income North America", "High-income Southern Latin America", "High-income Western Europe", "Caribbean", "Central Latin America", "Tropical Latin America", "Andean Latin America", "Central sub-Saharan Africa", "Eastern sub-Saharan Africa", "Southern sub-Saharan Africa", "Western sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "East Asia", "Oceania", "Southeast Asia", "Central Asia", "Central Europe", "Eastern Europe")
#Function for bivariate analysis by region.
cases.NO2.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(`Population weighted NO2`)) %>%
ggplot(aes(x = `Population weighted NO2`, y = totcases.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. Weighted NO2 Concentration"))
}
NO2.lm.cases.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(`Population weighted NO2`)) %>%
lm(totcases.mil ~ `Population weighted NO2`,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2.w(i))
print(NO2.lm.cases.w(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64931 -40260 10414 30182 112615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.705e+04 1.263e+04 5.309 8.85e-06 ***
## `Population weighted NO2` -1.385e+12 3.788e+12 -0.366 0.717
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42870 on 31 degrees of freedom
## Multiple R-squared: 0.004294, Adjusted R-squared: -0.02783
## F-statistic: 0.1337 on 1 and 31 DF, p-value: 0.7171
##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40870 -15279 -7329 5880 57138
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.771e+04 7.714e+03 3.592 0.00134 **
## `Population weighted NO2` 4.663e+12 6.534e+12 0.714 0.48181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26140 on 26 degrees of freedom
## Multiple R-squared: 0.01921, Adjusted R-squared: -0.01851
## F-statistic: 0.5093 on 1 and 26 DF, p-value: 0.4818
##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6736 -4698 -4014 -1864 50141
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.132e+03 3.848e+03 2.114 0.0407 *
## `Population weighted NO2` -1.341e+12 2.418e+12 -0.554 0.5823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11540 on 41 degrees of freedom
## Multiple R-squared: 0.007442, Adjusted R-squared: -0.01677
## F-statistic: 0.3074 on 1 and 41 DF, p-value: 0.5823
##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41663 -36786 -6911 24056 117570
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.845e+04 1.178e+04 3.263 0.00409 **
## `Population weighted NO2` 1.228e+12 2.656e+12 0.462 0.64916
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40360 on 19 degrees of freedom
## Multiple R-squared: 0.01112, Adjusted R-squared: -0.04093
## F-statistic: 0.2137 on 1 and 19 DF, p-value: 0.6492
##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## 1 2 3 4 5
## -3081.2 -1309.4 11898.8 292.2 -7800.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.558e+03 1.220e+04 -0.537 0.628
## `Population weighted NO2` 5.292e+12 3.450e+12 1.534 0.223
##
## Residual standard error: 8440 on 3 degrees of freedom
## Multiple R-squared: 0.4396, Adjusted R-squared: 0.2528
## F-statistic: 2.353 on 1 and 3 DF, p-value: 0.2226
##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33382 -23708 -14100 -5892 123597
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.482e+04 1.461e+04 2.383 0.0299 *
## `Population weighted NO2` -9.426e+12 6.723e+12 -1.402 0.1800
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45240 on 16 degrees of freedom
## Multiple R-squared: 0.1094, Adjusted R-squared: 0.05374
## F-statistic: 1.965 on 1 and 16 DF, p-value: 0.18
##
## Call:
## lm(formula = totcases.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56366 -25035 -83 21727 77966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.417e+04 1.960e+04 1.233 0.2285
## `Population weighted NO2` 1.522e+13 6.308e+12 2.413 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36240 on 26 degrees of freedom
## Multiple R-squared: 0.183, Adjusted R-squared: 0.1516
## F-statistic: 5.823 on 1 and 26 DF, p-value: 0.02317
deaths.NO2.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(`Population weighted NO2`)) %>%
ggplot(aes(x = `Population weighted NO2`, y = totdeaths.mil)) +
geom_point(color = "#333aff", size = 1) +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Weighted NO2 Concentration"))
}
NO2.lm.deaths.w <- function(x) {
weighted.covid.NO2.df %>%
filter(Super.region == x) %>%
filter(!is.na(totdeaths.mil)) %>%
filter(!is.na(`Population weighted NO2`)) %>%
lm(totdeaths.mil ~ `Population weighted NO2`,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2.w(i))
print(NO2.lm.deaths.w(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1053.80 -840.99 32.75 668.08 1157.03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.185e+02 2.168e+02 4.236 0.000189 ***
## `Population weighted NO2` 3.551e+10 6.503e+10 0.546 0.588899
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 736.1 on 31 degrees of freedom
## Multiple R-squared: 0.009529, Adjusted R-squared: -0.02242
## F-statistic: 0.2982 on 1 and 31 DF, p-value: 0.5889
##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1334.0 -611.0 -308.5 285.3 4895.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.607e+02 3.476e+02 2.188 0.0378 *
## `Population weighted NO2` 1.926e+11 2.944e+11 0.654 0.5188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1178 on 26 degrees of freedom
## Multiple R-squared: 0.01619, Adjusted R-squared: -0.02165
## F-statistic: 0.4278 on 1 and 26 DF, p-value: 0.5188
##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -149.73 -103.62 -58.94 -6.57 808.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.223e+01 6.391e+01 0.661 0.512
## `Population weighted NO2` 4.480e+10 4.017e+10 1.115 0.271
##
## Residual standard error: 191.7 on 41 degrees of freedom
## Multiple R-squared: 0.02945, Adjusted R-squared: 0.005776
## F-statistic: 1.244 on 1 and 41 DF, p-value: 0.2712
##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -425.56 -322.47 -72.51 285.37 788.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.127e+02 1.120e+02 3.684 0.00158 **
## `Population weighted NO2` 1.839e+10 2.525e+10 0.728 0.47528
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 383.6 on 19 degrees of freedom
## Multiple R-squared: 0.02716, Adjusted R-squared: -0.02404
## F-statistic: 0.5305 on 1 and 19 DF, p-value: 0.4753
##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## 1 2 3 4 5
## -27.14 -39.47 149.50 -11.88 -71.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.287e+02 1.442e+02 -0.893 0.438
## `Population weighted NO2` 8.508e+10 4.076e+10 2.087 0.128
##
## Residual standard error: 99.72 on 3 degrees of freedom
## Multiple R-squared: 0.5922, Adjusted R-squared: 0.4563
## F-statistic: 4.357 on 1 and 3 DF, p-value: 0.1281
##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -152.68 -121.56 -54.04 75.22 526.69
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.648e+02 5.641e+01 2.921 0.010 **
## `Population weighted NO2` -3.499e+10 2.596e+10 -1.348 0.196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 174.7 on 16 degrees of freedom
## Multiple R-squared: 0.102, Adjusted R-squared: 0.04587
## F-statistic: 1.817 on 1 and 16 DF, p-value: 0.1964
##
## Call:
## lm(formula = totdeaths.mil ~ `Population weighted NO2`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1291.22 -692.02 -88.65 581.03 1886.11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.747e+02 4.858e+02 0.977 0.3374
## `Population weighted NO2` 3.351e+11 1.563e+11 2.143 0.0416 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 898.2 on 26 degrees of freedom
## Multiple R-squared: 0.1502, Adjusted R-squared: 0.1175
## F-statistic: 4.594 on 1 and 26 DF, p-value: 0.04162
#NO2 concentration vs population density.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = pop.density)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Population Density") +
labs(title = "World Population Density vs. NO2 Concentration") cor(covid.NO2.df$pop.density, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.2039701
NO2.lm.pop <- lm(pop.density ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.pop)##
## Call:
## lm(formula = pop.density ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2374.4 -517.6 -242.1 -4.6 19313.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -281.3 297.3 -0.946 0.34520
## NO2_Concentration 40672190.0 14015313.9 2.902 0.00414 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2106 on 194 degrees of freedom
## (55 observations deleted due to missingness)
## Multiple R-squared: 0.0416, Adjusted R-squared: 0.03666
## F-statistic: 8.422 on 1 and 194 DF, p-value: 0.004137
confint(NO2.lm.pop) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) -867.6018 3.050088e+02
## NO2_Concentration 13030241.3588 6.831414e+07
#NO2 concentration vs median age.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = med.age)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Median Age") +
labs(title = "World Median Age by Country vs. NO2 Concentration") cor(covid.NO2.df$med.age, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3897704
NO2.lm.age <- lm(med.age ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.age)##
## Call:
## lm(formula = med.age ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2819 -7.6082 0.2393 6.8515 14.9838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.437e+01 1.237e+00 19.70 < 2e-16 ***
## NO2_Concentration 3.281e+05 5.746e+04 5.71 4.53e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.4 on 182 degrees of freedom
## (67 observations deleted due to missingness)
## Multiple R-squared: 0.1519, Adjusted R-squared: 0.1473
## F-statistic: 32.6 on 1 and 182 DF, p-value: 4.528e-08
confint(NO2.lm.age) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 21.92602 26.80561
## NO2_Concentration 214701.10076 441432.36905
#NO2 concentration vs GDP per capita.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = gdp.cap)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "GDP per Capita") +
labs(title = "World GDP per Capita vs. NO2 Concentration") cor(covid.NO2.df$gdp.cap, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3524915
NO2.lm.gdp <- lm(gdp.cap ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.gdp)##
## Call:
## lm(formula = gdp.cap ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30294 -13965 -5412 6907 88128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6948 2851 2.437 0.0158 *
## NO2_Concentration 680456994 133541576 5.095 8.62e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19450 on 183 degrees of freedom
## (66 observations deleted due to missingness)
## Multiple R-squared: 0.1243, Adjusted R-squared: 0.1195
## F-statistic: 25.96 on 1 and 183 DF, p-value: 8.62e-07
confint(NO2.lm.gdp) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 1.322168e+03 12573.53
## NO2_Concentration 4.169779e+08 943936113.77
#NO2 concentration vs cardiovascular death.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = card.death)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Cardiovascular Death Rates") +
labs(title = "World Cardiovascular Death Rates vs. NO2 Concentration") cor(covid.NO2.df$card.death, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] -0.2339947
NO2.lm.card.death <- lm(card.death ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.card.death)##
## Call:
## lm(formula = card.death ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -204.00 -91.69 -20.65 69.54 462.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.124e+02 1.746e+01 17.894 < 2e-16 ***
## NO2_Concentration -2.656e+06 8.180e+05 -3.247 0.00139 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 119.5 on 182 degrees of freedom
## (67 observations deleted due to missingness)
## Multiple R-squared: 0.05475, Adjusted R-squared: 0.04956
## F-statistic: 10.54 on 1 and 182 DF, p-value: 0.001389
confint(NO2.lm.card.death) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 277.9489 346.8407
## NO2_Concentration -4270000.3363 -1041995.4937
#NO2 concentration vs diabetes prevalence.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = diab.prev)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Diabetes Prevalence Rates") +
labs(title = "World Diabetes Prevalence Rates vs. NO2 Concentration") cor(covid.NO2.df$diab.prev, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] -0.2225499
NO2.lm.diab <- lm(diab.prev ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.diab)##
## Call:
## lm(formula = diab.prev ~ NO2_Concentration, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9101 -2.9118 -0.6811 2.3519 20.9957
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.008e+01 6.573e-01 15.335 < 2e-16 ***
## NO2_Concentration -9.807e+04 3.117e+04 -3.147 0.00192 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.624 on 190 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.04953, Adjusted R-squared: 0.04453
## F-statistic: 9.901 on 1 and 190 DF, p-value: 0.001918
confint(NO2.lm.diab) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) 8.783856e+00 11.37712
## NO2_Concentration -1.595482e+05 -36591.15253
#NO2 concentration vs COVID cases.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = "World COVID-19 Cases per Million People vs. NO2 Concentration") cor(covid.NO2.df$totcases.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.3165363
NO2.lm.cases <- lm(totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age, data = covid.NO2.df)
summary.lm(NO2.lm.cases)##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78822 -14001 -4366 9912 118443
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.057e+04 9.193e+03 -4.413 1.80e-05 ***
## NO2_Concentration 2.755e+08 2.418e+08 1.139 0.2563
## gdp.cap 2.713e-01 1.633e-01 1.661 0.0986 .
## pop.density -7.380e+00 3.021e+00 -2.443 0.0156 *
## med.age 2.239e+03 3.517e+02 6.367 1.71e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31160 on 171 degrees of freedom
## (75 observations deleted due to missingness)
## Multiple R-squared: 0.3929, Adjusted R-squared: 0.3787
## F-statistic: 27.67 on 4 and 171 DF, p-value: < 2.2e-16
confint(NO2.lm.cases) #Summary statistics for linear fit. ## 2.5 % 97.5 %
## (Intercept) -5.871916e+04 -2.242532e+04
## NO2_Concentration -2.018993e+08 7.528626e+08
## gdp.cap -5.116587e-02 5.936683e-01
## pop.density -1.334335e+01 -1.416295e+00
## med.age 1.545231e+03 2.933764e+03
#NO2 concentration vs COVID deaths.
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff", alpha = 1) +
geom_smooth(color = "red") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = "World COVID-19 Deaths per Million People vs. NO2 Concentration") cor(covid.NO2.df$totdeaths.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") ## [1] 0.2234735
NO2.lm.deaths <- lm(totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age, data = covid.NO2.df,)
summary.lm(NO2.lm.deaths)##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = covid.NO2.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1545.1 -361.5 -88.4 211.0 5187.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.114e+03 2.111e+02 -5.276 4.06e-07 ***
## NO2_Concentration 4.054e+06 5.544e+06 0.731 0.4657
## gdp.cap -8.090e-03 3.700e-03 -2.187 0.0302 *
## pop.density -1.727e-01 6.841e-02 -2.524 0.0125 *
## med.age 6.239e+01 7.971e+00 7.827 5.46e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 705.4 on 167 degrees of freedom
## (79 observations deleted due to missingness)
## Multiple R-squared: 0.3394, Adjusted R-squared: 0.3235
## F-statistic: 21.45 on 4 and 167 DF, p-value: 2.72e-14
confint(NO2.lm.deaths) #Summary statistics for linear fit.## 2.5 % 97.5 %
## (Intercept) -1.530810e+03 -6.970879e+02
## NO2_Concentration -6.891018e+06 1.499833e+07
## gdp.cap -1.539399e-02 -7.852779e-04
## pop.density -3.077412e-01 -3.762040e-02
## med.age 4.665222e+01 7.812781e+01
#Function for multivariate analysis by Super.region.
cases.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
ggplot(aes(x = NO2_Concentration, y = totcases.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.cases.reg <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
lm(totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age,.) %>%
summary.lm()
} #Function for linear regression statistics to be included in for loop below.
for (i in regions.list) {
print(cases.NO2(i))
print(NO2.lm.cases.reg(i))
} #Loops function for continents of interest. ##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80918 -20637 11757 21503 56252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.415e+05 6.915e+04 2.046 0.0506 .
## NO2_Concentration 6.753e+08 4.508e+08 1.498 0.1457
## gdp.cap -2.958e-01 4.276e-01 -0.692 0.4950
## pop.density -3.902e+00 5.323e+00 -0.733 0.4699
## med.age -2.064e+03 1.714e+03 -1.204 0.2391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37200 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1368, Adjusted R-squared: 0.008908
## F-statistic: 1.07 on 4 and 27 DF, p-value: 0.3908
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36446 -9842 98 11484 54489
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.364e+05 6.274e+04 -2.174 0.040781 *
## NO2_Concentration 2.462e+09 1.499e+09 1.642 0.114759
## gdp.cap -1.129e+00 9.615e-01 -1.174 0.252926
## pop.density -1.453e+02 3.437e+01 -4.228 0.000346 ***
## med.age 5.968e+03 2.104e+03 2.837 0.009595 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20580 on 22 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.4785, Adjusted R-squared: 0.3837
## F-statistic: 5.046 on 4 and 22 DF, p-value: 0.004862
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12928 -5002 -554 2599 30947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.130e+04 1.378e+04 -2.996 0.00486 **
## NO2_Concentration -4.610e+08 3.222e+08 -1.431 0.16091
## gdp.cap 2.543e-01 3.588e-01 0.709 0.48300
## pop.density -5.393e+00 1.110e+01 -0.486 0.62984
## med.age 2.794e+03 6.085e+02 4.592 4.94e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7897 on 37 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5783, Adjusted R-squared: 0.5327
## F-statistic: 12.69 on 4 and 37 DF, p-value: 1.35e-06
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28226 -10085 -1834 7016 51322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.839e+04 2.766e+04 -1.026 0.320947
## NO2_Concentration 6.601e+08 5.310e+08 1.243 0.232937
## gdp.cap 3.284e-01 1.977e-01 1.661 0.117453
## pop.density 5.675e+01 1.162e+01 4.886 0.000198 ***
## med.age 1.318e+03 1.058e+03 1.245 0.232072
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20020 on 15 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7965, Adjusted R-squared: 0.7423
## F-statistic: 14.68 on 4 and 15 DF, p-value: 4.54e-05
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.033e+06 NaN NaN NaN
## NO2_Concentration -8.654e+10 NaN NaN NaN
## gdp.cap -1.565e+02 NaN NaN NaN
## pop.density 1.933e+02 NaN NaN NaN
## med.age 2.022e+05 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -69519 -7960 -2246 4542 88232
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.202e+04 4.154e+04 0.289 0.7761
## NO2_Concentration -1.175e+09 1.168e+09 -1.006 0.3293
## gdp.cap 3.167e+00 1.454e+00 2.178 0.0447 *
## pop.density 5.813e+01 2.509e+01 2.317 0.0341 *
## med.age -8.473e+02 2.107e+03 -0.402 0.6929
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31640 on 16 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.5774, Adjusted R-squared: 0.4718
## F-statistic: 5.465 on 4 and 16 DF, p-value: 0.005715
##
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32404 -21359 -7186 14258 90747
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.667e+04 4.108e+04 -2.110 0.0460 *
## NO2_Concentration 8.479e+08 1.654e+09 0.513 0.6130
## gdp.cap 5.136e-01 9.481e-01 0.542 0.5932
## pop.density 1.954e+01 2.755e+02 0.071 0.9441
## med.age 3.267e+03 1.374e+03 2.377 0.0262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30220 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4975, Adjusted R-squared: 0.4101
## F-statistic: 5.693 on 4 and 23 DF, p-value: 0.002456
deaths.NO2 <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
ggplot(aes(x = NO2_Concentration, y = totdeaths.mil)) +
geom_point(color = "#333aff") +
geom_smooth(color = "red") +
geom_smooth(method = "lm", color = "purple") +
theme_bw() +
labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
}
NO2.lm.deaths.reg <- function(x) {
covid.NO2.df %>%
filter(Super.region == x) %>%
lm(totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age,.) %>%
summary.lm() #Function for linear regression statistics to be included in for loop below.
}
for (i in regions.list) {
print(deaths.NO2(i))
print(NO2.lm.deaths.reg(i))
} #Loops function for continents of interest.##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1311.58 -566.71 9.46 532.00 1056.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.027e+03 1.346e+03 0.762 0.452
## NO2_Concentration 5.949e+06 8.777e+06 0.678 0.504
## gdp.cap -1.140e-02 8.326e-03 -1.369 0.182
## pop.density -7.383e-02 1.037e-01 -0.712 0.482
## med.age 8.725e+00 3.338e+01 0.261 0.796
##
## Residual standard error: 724.2 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1443, Adjusted R-squared: 0.0175
## F-statistic: 1.138 on 4 and 27 DF, p-value: 0.3598
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1308.9 -446.6 -12.6 258.7 4441.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.529e+03 3.356e+03 -0.753 0.4592
## NO2_Concentration 4.585e+07 8.021e+07 0.572 0.5733
## gdp.cap -3.965e-02 5.143e-02 -0.771 0.4490
## pop.density -4.825e+00 1.839e+00 -2.624 0.0155 *
## med.age 1.417e+02 1.125e+02 1.259 0.2212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1101 on 22 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.2587, Adjusted R-squared: 0.1239
## F-statistic: 1.919 on 4 and 22 DF, p-value: 0.1428
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -245.62 -64.36 -21.13 51.06 483.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.329e+02 2.373e+02 -3.931 0.000357 ***
## NO2_Concentration 1.203e+06 5.549e+06 0.217 0.829502
## gdp.cap 1.500e-03 6.179e-03 0.243 0.809558
## pop.density -4.707e-02 1.911e-01 -0.246 0.806802
## med.age 5.193e+01 1.048e+01 4.956 1.61e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 136 on 37 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5583, Adjusted R-squared: 0.5105
## F-statistic: 11.69 on 4 and 37 DF, p-value: 3.087e-06
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -386.42 -167.92 -62.21 131.12 585.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.698e+02 4.065e+02 -1.894 0.0777 .
## NO2_Concentration 1.645e+07 7.804e+06 2.108 0.0522 .
## gdp.cap -8.021e-03 2.906e-03 -2.760 0.0146 *
## pop.density 1.067e-01 1.707e-01 0.625 0.5413
## med.age 3.954e+01 1.556e+01 2.542 0.0226 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 294.3 on 15 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5256, Adjusted R-squared: 0.3991
## F-statistic: 4.154 on 4 and 15 DF, p-value: 0.01841
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.834e+04 NaN NaN NaN
## NO2_Concentration -1.223e+09 NaN NaN NaN
## gdp.cap -2.207e+00 NaN NaN NaN
## pop.density 2.771e+00 NaN NaN NaN
## med.age 2.841e+03 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 4 and 0 DF, p-value: NA
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -284.60 -51.08 7.73 68.13 335.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.906e+01 2.084e+02 0.379 0.711
## NO2_Concentration -7.100e+06 6.049e+06 -1.174 0.263
## gdp.cap 1.282e-02 7.330e-03 1.749 0.106
## pop.density 9.886e-02 1.210e-01 0.817 0.430
## med.age -1.136e+00 1.019e+01 -0.112 0.913
##
## Residual standard error: 150.4 on 12 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.4931, Adjusted R-squared: 0.3242
## F-statistic: 2.919 on 4 and 12 DF, p-value: 0.06712
##
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density +
## med.age, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1333.94 -396.57 5.37 308.10 1184.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.037e+03 8.427e+02 -3.603 0.00150 **
## NO2_Concentration 4.754e+07 3.392e+07 1.401 0.17447
## gdp.cap -1.507e-02 1.945e-02 -0.775 0.44625
## pop.density 7.823e-01 5.651e+00 0.138 0.89111
## med.age 9.481e+01 2.819e+01 3.363 0.00269 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 619.8 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.642, Adjusted R-squared: 0.5797
## F-statistic: 10.31 on 4 and 23 DF, p-value: 6.214e-05
#Getting satellite data specifically for this time frame.
July_Oct2018.sat.r <- raster('US_ground_NO2/US_NO2_2018-07-01_start.tif') %>%
flip(direction='y')
July_Oct2018.sat.r <- crop(July_Oct2018.sat.r, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
July_Oct2018.sat.r <- mask(July_Oct2018.sat.r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
#Direct conversion from NO2 raster to data frame while filtering by shapefile.
continental.US.fxn <- function(x) {
r1 <- raster(x) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
r1 <- mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- rasterToPoints(r1, spatial = TRUE)
July_Oct2018.sat <- data.frame(r1) %>%
select(!("optional")) %>%
select( "y", "x", "US_NO2_2018.07.01_start")
July_Oct2018.sat <- rename(July_Oct2018.sat, Latitude = y, Longitude = x, NO2_concentration = US_NO2_2018.07.01_start)
assign("July_Oct2018.sat", July_Oct2018.sat, .GlobalEnv)
} #Final satellite data to use for comparison to EPA data.
continental.US.fxn("US_ground_NO2/US_NO2_2018-07-01_start.tif")
#Selecting just latitude and longitude from data frame just made to use with Pargasite retrieval.
US.lat.lon <- July_Oct2018.sat %>% select(c(Latitude, Longitude))
# write.csv(US.lat.lon, file.path("US_ground_NO2", "US_lat_lon.csv"), row.names = FALSE) #Used to gather EPA data from Pargasite.
#Reading in EPA data
#Pargasite: only overlapping time frame is July 2018 - October 2018 (Pargasite stops working in November 2018)
July_Oct2018.ground <- read.csv(file = './US_ground_NO2/July_Oct2018NO2.csv') %>% select(c("Latitude", "Longitude", "no2_estimate")) %>% rename(NO2_concentration = no2_estimate)
July_Oct2018.ground <- data.frame(July_Oct2018.ground) #Final EPA data to use in comparison to satellite data.
#EPA data frame changed to raster format.
July_Oct2018.ground.r <- July_Oct2018.ground %>% select(c("Longitude", "Latitude", "NO2_concentration")) %>% rasterFromXYZ() #Resampling raster data to be same size for comparisons.
July_Oct2018.sat.r.re <- July_Oct2018.sat.r %>% resample(July_Oct2018.ground.r)
#Perform correlation test between satellite and ground rasters.
cor(values(July_Oct2018.sat.r.re),
values(July_Oct2018.ground.r),
use = "na.or.complete")## [1] 0.03616317
#Linear model estimation.
lm1 <- lm(values(July_Oct2018.sat.r.re) ~ values(July_Oct2018.ground.r))
summary(lm1)##
## Call:
## lm(formula = values(July_Oct2018.sat.r.re) ~ values(July_Oct2018.ground.r))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.184e-05 -5.636e-06 -7.730e-07 3.633e-06 4.478e-05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.152e-05 8.089e-07 26.603 <2e-16 ***
## values(July_Oct2018.ground.r) 1.211e-07 1.173e-07 1.032 0.302
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.48e-06 on 813 degrees of freedom
## (417 observations deleted due to missingness)
## Multiple R-squared: 0.001308, Adjusted R-squared: 7.937e-05
## F-statistic: 1.065 on 1 and 813 DF, p-value: 0.3025
#Local correlation estimates--shows where locally on map specific regions are positively or negatively correlated. (Getting an error: try to replicate code from this website https://statnmap.com/2018-01-27-spatial-correlation-between-rasters/)ggplot() +
geom_raster(data = July_Oct2018.sat, aes(x = Longitude, y = Latitude, fill = NO2_concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle("NO2 US Satellite Data, July-Oct 2018") +
#scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100)) +
north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
xlim(-130,-65) +
ylim(23,49) +
coord_quickmap()ggplot() +
geom_raster(data = July_Oct2018.ground, aes(x = Longitude, y = Latitude, fill = NO2_concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
my_theme() +
ggtitle("NO2 US EPA Ground Data, July-Oct 2018") +
#scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
scale_fill_gradientn(name = "NO2 Concentration \n(ppb)", colours = myPalette(100)) +
north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
xlim(-130,-65) +
ylim(23,49) +
coord_quickmap()#No success in retrieving data so far due to lack of shapefiles. #Unsuccessful without ArcGIS access.
london.shp <- st_read("six_cities_shapefiles/london/ESRI/London_Borough_Excluding_MHW.shp") # Read shapefile as an sf object## Reading layer `London_Borough_Excluding_MHW' from data source `/Users/alanaschreibman/COVID-19_NO2/six_cities_shapefiles/london/ESRI/London_Borough_Excluding_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## Projected CRS: OSGB 1936 / British National Grid
class(london.shp)## [1] "sf" "data.frame"
london.shp <- st_geometry(london.shp)
plot(london.shp)seattle.shp <- st_read("six_cities_shapefiles/seattle/WA.shp") # Read shapefile as an sf object## Reading layer `WA' from data source `/Users/alanaschreibman/COVID-19_NO2/six_cities_shapefiles/seattle/WA.shp' using driver `ESRI Shapefile'
## Simple feature collection with 39 features and 48 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -124.7314 ymin: 45.54325 xmax: -116.9182 ymax: 49
## CRS: NA
class(seattle.shp)## [1] "sf" "data.frame"
seattle.shp <- st_geometry(seattle.shp)
plot(seattle.shp)#Unsuccessful: cannot find shapefiles for the selected cities without ArcGIS access.
#Read in world AQI data for 6 cities. Dates range 12/30/19 to 4/5/20.
waqi.main <- read.csv(file = 'waqi_NO2.csv', header = TRUE)
seattle.df <- filter(waqi.main, City == "Seattle")
seattle.df$Date <- as.Date(seattle.df$Date, format = "%m/%d/%y")
seattle.df <- arrange(seattle.df, Date)
wuhan.df <- filter(waqi.main, City == "Wuhan")
wuhan.df$Date <- as.Date(wuhan.df$Date, format = "%m/%d/%y")
wuhan.df <- arrange(wuhan.df, Date)
milan.df <- filter(waqi.main, City == "Milan")
milan.df$Date <- as.Date(milan.df$Date, format = "%m/%d/%y")
milan.df <- arrange(milan.df, Date)
london.df <- filter(waqi.main, City == "London")
london.df$Date <- as.Date(london.df$Date, format = "%m/%d/%y")
london.df <- arrange(london.df, Date)
saopaulo.df <- filter(waqi.main, City == "Sao Paulo")
saopaulo.df$Date <- as.Date(saopaulo.df$Date, format = "%m/%d/%y")
saopaulo.df <- arrange(saopaulo.df, Date)
johannesburg.df <- filter(waqi.main, City == "Johannesburg")
johannesburg.df$Date <- as.Date(johannesburg.df$Date, format = "%m/%d/%y")
johannesburg.df <- arrange(johannesburg.df, Date)
dates.2020.Q1 <- seq(as.Date("2019-12-30"), as.Date("2020-04-05"), by=7) #List of dates starting Dec 30th 2019.
#Averaging NO2 data by the week.
#Seattle
seattle.ground.fxn <- function(x) {
date.filter <- subset(seattle.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
seattle.avg.df <- data.frame(mean)
names(seattle.avg.df)[1] <- "Mean_NO2_Concentration"
Week_start_date <- as.Date(dates.2020.Q1[[x]])
Type <- rep("satellite", length.out = nrow(seattle.avg.df))
seattle.ground.weekly <- cbind(Week_start_date, Type, seattle.avg.df)
seattle.ground.weekly <- seattle.ground.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- seq_along(dates.2020.Q1)
seattle.ground.weekly.df <- purrr::map_dfr(x, seattle.ground.fxn)
#Wuhan
wuhan.ground.NO2.fxn <- function(x) {
date.filter <- subset(wuhan.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
wuhan.avg.df <- data.frame(date.start, wuhan.df[1,3], mean)
names(wuhan.avg.df)[1] <- "Week_start_date"
names(wuhan.avg.df)[2] <- "City"
names(wuhan.avg.df)[3] <- "Mean_of_Median_NO2"
output <- wuhan.avg.df
return(output)
}
wuhan.ground.df <- purrr::map_dfr(x, wuhan.ground.NO2.fxn)
#Milan
milan.ground.NO2.fxn <- function(x) {
date.filter <- subset(milan.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
milan.avg.df <- data.frame(date.start, milan.df[1,3], mean)
names(milan.avg.df)[1] <- "Week_start_date"
names(milan.avg.df)[2] <- "City"
names(milan.avg.df)[3] <- "Mean_of_Median_NO2"
output <- milan.avg.df
return(output)
}
milan.ground.df <- purrr::map_dfr(x, milan.ground.NO2.fxn)
#London
london.ground.fxn <- function(x) {
date.filter <- subset(london.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
london.avg.df <- data.frame(mean)
names(london.avg.df)[1] <- "Mean_NO2_Concentration"
Week_start_date <- as.Date(dates.2020.Q1[[x]])
Type <- rep("satellite", length.out = nrow(london.avg.df))
london.ground.weekly <- cbind(Week_start_date, Type, london.avg.df)
london.ground.weekly <- london.ground.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- seq_along(dates.2020.Q1)
london.ground.weekly <- purrr::map_dfr(x, london.ground.fxn)
#Sao Paulo
saopaulo.ground.NO2.fxn <- function(x) {
date.filter <- subset(saopaulo.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
saopaulo.avg.df <- data.frame(date.start, saopaulo.df[1,3], mean)
names(saopaulo.avg.df)[1] <- "Week_start_date"
names(saopaulo.avg.df)[2] <- "City"
names(saopaulo.avg.df)[3] <- "Mean_of_Median_NO2"
output <- saopaulo.avg.df
return(output)
}
saopaulo.ground.df <- purrr::map_dfr(x, saopaulo.ground.NO2.fxn)
#Jonannesburg
jonannesburg.ground.NO2.fxn <- function(x) {
date.filter <- subset(johannesburg.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
mean <- mean(date.filter$median)
date.start <- as.Date(dates.2020.Q1[[x]])
johannesburg.avg.df <- data.frame(date.start, johannesburg.df[1,3], mean)
names(johannesburg.avg.df)[1] <- "Week_start_date"
names(johannesburg.avg.df)[2] <- "City"
names(johannesburg.avg.df)[3] <- "Mean_of_Median_NO2"
output <- johannesburg.avg.df
return(output)
}
jonannesburg.ground.df <- purrr::map_dfr(x, jonannesburg.ground.NO2.fxn)
#Retrieving satellite data:
#Used code:
#gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/six_cities_ground_NO2
# in terminal to copy all files from google cloud storage into folders in working directory.
#Plan:
#get shapefiles for each city
#use shapefiles to filter satellite data for each city and make data frame equivalent to ground data per week
#make graphs for each comparing the two data sets
#Creates list of raster files from folder to call in function.
seattle.rastlist <- list.files(path = "six_cities_ground_NO2/seattle", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Function to make data frame for weekly average NO2 concentration.
seattle.sat.fxn <- function(x, y) {
r <- raster(paste0("six_cities_ground_NO2/seattle/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, `seattle.shp`)
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(`seattle.shp`@data, r.mean)
Week_start_date <- as.Date(dates.2020.Q1[[y]])
Type <- rep(satellite, length.out = nrow(final.df))
seattle.sat.weekly <- cbind(final.df, Date, Week, Type)
seattle.sat.weekly <- seattle.sat.weekly %>% dplyr::mutate(Date = as.factor(strftime(as.Date(Date, format="%Y-%m-%d"), format="%m-%d")))
names(seattle.sat.weekly)[11] <- "Mean_NO2_Concentration"
seattle.sat.weekly <- seattle.sat.weekly %>% select(Type, Date, Mean_NO2_Concentration)
}
x <- seattle.rastlist
y <- 1:length(dates.2020.Q1)
seattle.sat.weekly.df <- purrr::map2_dfr(x, y, seattle.sat.fxn)
seattle.weekly.df <- rbind(seattle.sat.weekly.df, seattle.ground.weekly.df)
london.rastlist <- list.files(path = "six_cities_ground_NO2/london", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
london.sat.fxn <- function(x, y) {
r <- raster(paste0("six_cities_ground_NO2/london/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, `london.shp`)
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(r.mean)
Week_start_date <- as.Date(dates.2020.Q1[[y]])
Type <- rep("satellite", length.out = nrow(final.df))
london.sat.weekly <- cbind(final.df, Week_start_date, Type)
names(london.sat.weekly)[1] <- "Mean_NO2_Concentration"
london.sat.weekly <- london.sat.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- london.rastlist
y <- 1:length(dates.2020.Q1)
london.sat.weekly <- purrr::map2_dfr(x, y, london.sat.fxn)
london.weekly.df <- rbind(london.sat.weekly,london.ground.weekly.df) #Use function in other NO2_Exploratory Analysis file to filter out each week from Jan-Dec 2019 and Jan-Dec 2020 for comparison.
#Used code:
# gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/US_time_analysis_2020
# in terminal to copy all files from google cloud storage into folders in working directory.
#2019 data.
#Creates list of raster files from folder to call in function.
rastlist.2019 <- list.files(path = "US_time_analysis_2019", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
read.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2019/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="United States", ]@data, r.mean)
Date <- as.Date(dates.2019[[y]])
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep(2019, length.out = nrow(final.df))
US.2019.weekly <- cbind(final.df, Date, Week, Period)
US.2019.weekly <- US.2019.weekly %>% dplyr::mutate(Date = as.factor(strftime(as.Date(Date, format="%Y-%m-%d"), format="%m-%d")))
rm(r.mean, r.vals)
names(US.2019.weekly)[12] <- "NO2_Concentration"
US.2019.weekly <- US.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- rastlist.2019
y <- 1:length(dates.2019)
US.2019.weekly.df <- purrr::map2_dfr(x, y, read.fxn)
US.2019.weekly.df$Date <- paste0("2020-", US.2019.weekly.df$Date)
#2020 data.
#Creates list of raster files from folder to call in function.
rastlist.2020 <- list.files(path = "US_time_analysis_2020", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
read.fxn <- function(x, y) {
r <- raster(paste0("US_time_analysis_2020/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="United States", ]@data, r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep(2020, length.out = nrow(final.df))
US.2020.weekly <- cbind(final.df, Date, Week, Period) # Add new column to data
rm(r.mean, r.vals)
names(US.2020.weekly)[12] <- "NO2_Concentration"
US.2020.weekly <- US.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- rastlist.2020
y <- 1:length(dates.2020)
US.2020.weekly.df <- purrr::map2_dfr(x, y, read.fxn)
US.weekly.df <- rbind(US.2019.weekly.df, US.2020.weekly.df)
US.weekly.df$Date <- as.Date(US.weekly.df$Date)
US.weekly.df$Period <- as.factor(US.weekly.df$Period)ggplot(US.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
geom_line(size = 0.75) +
theme(legend.key.size = unit(1, 'cm'),
legend.title = element_text(size=14),
legend.text = element_text(size=10),
axis.title=element_text(size = 20),
axis.text.y=element_text(size=24),
axis.text.x=element_text(size=30,angle=90, hjust=1)) +
labs(fill = "Year of Measured Pollution") +
ggtitle("US NO2 in 2019 vs 2020") +
ylab("NO2 Concentration (mol/m^2)") +
scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 3E-5) +
#Pre-lockdown
geom_rect(data=US.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-03-19",format="%Y-%m-%d"),ymin=2.9E-5,ymax=3E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=US.weekly.df,aes(x=as.Date("2020-02-07"),y=2.95E-5),label="Pre-Lockdown",color = "black", size = 2) +
geom_vline(data=US.weekly.df,xintercept = as.Date("2020-03-19",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=US.weekly.df,aes(xmin=as.Date("2020-03-19",format="%Y-%m-%d"),xmax=as.Date("2020-06-05",format="%Y-%m-%d"),ymin=2.9E-5,ymax=3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=US.weekly.df,aes(x=as.Date("2020-05-01"),y=2.95E-5),label="Lockdown 1", color = "black", size = 2) +
geom_vline(data=US.weekly.df,xintercept=as.Date("2020-06-05",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=US.weekly.df,aes(xmin=as.Date("2020-06-05",format="%Y-%m-%d"),xmax=as.Date("2020-11-16",format="%Y-%m-%d"), ymin=2.9E-5,ymax=3E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=US.weekly.df,aes(x=as.Date("2020-08-15"),y=2.95E-5),label="Re-Opening", color = "black", size = 2) +
geom_vline(data=US.weekly.df,xintercept=as.Date("2020-11-16",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 2
geom_rect(data=US.weekly.df,aes(xmin=as.Date("2020-11-16",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"), ymin=2.9E-5,ymax=3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=US.weekly.df,aes(x=as.Date("2020-12-10"),y=2.95E-5),label="Lockdown 2", color = "black", size = 2) +
theme_bw() +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("US_time_series.pdf")#Use function in other NO2_Exploratory Analysis file to filter out each week from Jan-Dec 2019 and Jan-Dec 2020 for comparison.
#Used code:
# gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/China_time_analysis_2020
# in terminal to copy all files from google cloud storage into folders in working directory.
#2019 data.
#Creates list of raster files from folder to call in function.
rastlist.2019 <- list.files(path = "China_time_analysis_2019", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-11-05"), as.Date("2019-11-10"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
read.fxn <- function(x, y) {
r <- raster(paste0("China_time_analysis_2019/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="China", ]@data, r.mean)
Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep("2018-2019", length.out = nrow(final.df))
China.2019.weekly <- cbind(final.df, Date, Week, Period)
rm(r.mean, r.vals)
names(China.2019.weekly)[12] <- "NO2_Concentration"
China.2019.weekly <- China.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- rastlist.2019
y <- 1:length(dates.2019)
China.2019.weekly.df <- purrr::map2_dfr(x, y, read.fxn)
#2020 data.
#Creates list of raster files from folder to call in function.
rastlist.2020 <- list.files(path = "China_time_analysis_2020", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-11-04"), as.Date("2020-11-08"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
read.fxn <- function(x, y) {
r <- raster(paste0("China_time_analysis_2020/", x)) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="China", ]@data, r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep("2019-2020", length.out = nrow(final.df))
China.2020.weekly <- cbind(final.df, Date, Week, Period) # Add new column to data
rm(r.mean, r.vals)
names(China.2020.weekly)[12] <- "NO2_Concentration"
China.2020.weekly <- China.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x <- rastlist.2020
y <- 1:length(dates.2020)
China.2020.weekly.df <- purrr::map2_dfr(x, y, read.fxn)
China.weekly.df <- rbind(China.2019.weekly.df, China.2020.weekly.df)
China.weekly.df$Date <- as.Date(China.weekly.df$Date)
China.weekly.df$Period <- as.factor(China.weekly.df$Period)
max(China.weekly.df$NO2_Concentration)## [1] 5.822227e-05
ggplot(China.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
geom_line(size = 0.75) +
labs(fill = "Year of Measured Pollution") +
ggtitle("China NO2 in 2019 vs 2020") +
ylab("NO2 Concentration (mol/m^2)") +
scale_x_date(date_labels = "%b-%Y",date_breaks = "1 month",limits=c(as.Date("2019-11-01",format="%Y-%m-%d"),as.Date("2020-11-10",format="%Y-%m-%d"))) +
ylim(0,4.7E-5) +
theme_bw() +
theme(legend.key.size = unit(1, 'cm'),
legend.title = element_text(size=12),
legend.text = element_text(size=10),
axis.title = element_text(size = 10),
axis.text.x = element_text(size=10, angle=45, hjust=1),
axis.text.y = element_text(size=10)) +
#Pre-lockdown
geom_rect(data=China.weekly.df,aes(xmin=as.Date("2019-11-01",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=4.6E-5,ymax=4.7E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
geom_text(data=China.weekly.df,aes(x=as.Date("2019-12-15"),y=4.65E-5),label="Pre-Lockdown",color = "black", size = 2) +
geom_vline(data=China.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Lockdown 1
geom_rect(data=China.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-04-08",format="%Y-%m-%d"),ymin=4.6E-5,ymax=4.7E-5),fill = "#fa9796",color = NA,alpha=0.02) +
geom_text(data=China.weekly.df,aes(x=as.Date("2020-03-01"),y=4.65E-5),label="Lockdown 1", color = "black", size = 2) +
geom_vline(data=China.weekly.df,xintercept=as.Date("2020-04-08",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) +
#Re-opening Green PHASE
geom_rect(data=China.weekly.df,aes(xmin=as.Date("2020-04-08",format="%Y-%m-%d"),xmax=as.Date("2020-11-08",format="%Y-%m-%d"), ymin=4.6E-5,ymax=4.7E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
geom_text(data=China.weekly.df,aes(x=as.Date("2020-08-01"),y=4.65E-5),label="Re-Opening", color = "black", size = 2) +
scale_alpha_manual(values = c(0.4, 1.1)) +
scale_color_manual(values = c("gray", "blue", "black")) +
ggsave("China_time_series.pdf")#Use function in other NO2_Exploratory Analysis file to filter out each week from Jan-Dec 2019 and Jan-Dec 2020 for comparison.
#Unsuccessful so far: need to remove inner country boundaries from shapefile and use only world outline.
#Used code:
#gsutil -m cp \
# > "file names" \
# > COVID-19_NO2/time_analysis_2020
# in terminal to copy all files from google cloud storage into folders in working directory.
#2019 data.
#Creates list of raster files from folder to call in function.
rastlist.2019.left <- list.files(path = "time_analysis_2019_left", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
rastlist.2019.right <- list.files(path = "time_analysis_2019_right", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018.
#Function to make data frame for weekly average NO2 concentration.
read.fxn <- function(x1, x2, y) {
left <- raster(paste0("time_analysis_2019_left/", x1)) %>%
flip(direction='y')
right <- raster(paste0("time_analysis_2019_right/", x2)) %>%
flip(direction='y')
x <- list(left, right)
names(x)[1:2] <- c('x', 'y')
x$fun <- mean
x$na.rm <- TRUE
world.NO2.r <- do.call(mosaic, x) %>% flip(direction='y') #Combines left and right halves of world raster.
r.vals <- raster::extract(world.NO2.r, wrld_simpl)
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl@data, r.mean)
Date <- as.Date(dates.2019[[y]])
Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6))
Period <- rep(2019, length.out = nrow(final.df))
world.2019.weekly <- cbind(final.df, Date, Week, Period)
world.2019.weekly <- world.2019.weekly %>% dplyr::mutate(Date = as.factor(strftime(as.Date(Date, format="%Y-%m-%d"), format="%m-%d")))
rm(r.mean, r.vals)
names(world.2019.weekly)[12] <- "NO2_Concentration"
world.2019.weekly <- world.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x1 <- rastlist.2019.left
x2 <- rastlist.2019.right
y <- 1:length(dates.2019)
world.2019.weekly.df <- purrr::map2_dfr(x1, x2, y, read.fxn)
world.2019.weekly.df$Date <- paste0("2020-", world.2019.weekly.df$Date)
#2020 data.
#Creates list of raster files from folder to call in function.
rastlist.2020.left <- list.files(path = "time_analysis_2020_left", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
rastlist.2020.right <- list.files(path = "time_analysis_2020_right", pattern='.tif$',
all.files=TRUE, full.names=FALSE)
#Creates list of dates to call in function.
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019.
#Function to make data frame for weekly average NO2 concentration.
read.fxn <- function(x1, x2, y) {
left <- raster(paste0("time_analysis_2020_left/", x1)) %>%
flip(direction='y')
right <- raster(paste0("time_analysis_2020_right/", x2)) %>%
flip(direction='y')
x <- list(left, right)
names(x)[1:2] <- c('x', 'y')
x$fun <- mean
x$na.rm <- TRUE
world.NO2.r <- do.call(mosaic, x) %>% flip(direction='y') #Combines left and right halves of world raster.
r.vals <- raster::extract(r, wlrd_simpl)
r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
final.df <- data.frame(wrld_simpl@data, r.mean)
Date <- as.factor(as.Date(dates.2020[[y]]))
Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6))
Period <- rep(2020, length.out = nrow(final.df))
world.2020.weekly <- cbind(final.df, Date, Week, Period) # Add new column to data
rm(r.mean, r.vals)
names(world.2020.weekly)[12] <- "NO2_Concentration"
world.2020.weekly <- world.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
x1 <- rastlist.2020.left
x2 <- rastlist.2020.right
y <- 1:length(dates.2020)
world.2020.weekly.df <- purrr::map2_dfr(x1, x2, y, read.fxn)
world.weekly.df <- rbind(world.2019.weekly.df, world.2020.weekly.df)
world.weekly.df$Date <- as.Date(world.weekly.df$Date)
world.weekly.df$Period <- as.factor(world.weekly.df$Period)#Converting before lockdown and after lockdown rasters for US to data frames.
before.lockdown <- raster('./US_NO2_comparison/US_NO2_2020-01-19_start.tif') %>%
flip(direction='y') %>%
rasterToPoints(spatial = TRUE) %>%
data.frame()
before.lockdown <- before.lockdown %>%
rename(c(NO2_concentration = US_NO2_2020.01.19_start, Longitude = x, Latitude = y)) %>%
select(c("Latitude", "Longitude", "NO2_concentration"))
after.lockdown <- raster('./US_NO2_comparison/US_NO2_2020-03-19_start.tif') %>%
flip(direction='y') %>%
rasterToPoints(spatial = TRUE) %>%
data.frame()
after.lockdown <- after.lockdown %>%
rename(c(NO2_concentration = US_NO2_2020.03.19_start, Longitude = x, Latitude = y)) %>%
select(c("Latitude", "Longitude", "NO2_concentration"))
#Plotting before and after maps.
p1 <- ggplot() +
geom_raster(data = before.lockdown , aes(x = Longitude, y = Latitude, fill = NO2_concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", fill = NA) +
coord_quickmap(
xlim = c(-123, -65),
ylim = c(23, 49)
) +
labs(fill = "NO2 Concentration \n(mol/m^2)") +
scale_fill_viridis_c(limits = c(0, 0.00015)) +
ggtitle("US NO2 Concentrations (Sentinel Satellite) Before COVID-19 Lockdown")
p2 <- ggplot() +
geom_raster(data = after.lockdown , aes(x = Longitude, y = Latitude, fill = NO2_concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", fill = NA) +
coord_quickmap(
xlim = c(-123, -65),
ylim = c(23, 49)
) +
labs(fill = "NO2 Concentration \n(mol/m^2)") +
scale_fill_viridis_c(limits = c(0, 0.00015)) +
ggtitle("US NO2 Concentrations (Sentinel Satellite) After COVID-19 Lockdown")
plot_grid(p1, p2, labels = "AUTO", ncol = 1)rm(p1, p2)#Function for finding medians over each time period.
US_NO2_median <- function(x) {
r <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
lapply(r.vals, FUN=median, na.rm = TRUE)
}
US_NO2_median("US_NO2_2020-01-19_start.tif")## [[1]]
## [1] 1.965982e-05
US_NO2_median("US_NO2_2020-03-19_start.tif")## [[1]]
## [1] 1.799675e-05
#Function for finding averages over each time period.
US_NO2_average <- function(x) {
r <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
lapply(r.vals, FUN=mean, na.rm = TRUE)
}
US_NO2_average("US_NO2_2020-01-19_start.tif")## [[1]]
## [1] 2.182116e-05
US_NO2_average("US_NO2_2020-03-19_start.tif")## [[1]]
## [1] 1.934275e-05
#2019 violin plot
US_2019_violin <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2019_violin("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif")#2020 violin plot
US_2020_violin <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2020", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2 Before and After COVID-19') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2020_violin("US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif")#2021 violin plot
US_2021_violin <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2021", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2: 2021 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2021_violin("US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")#All violin plots in 1 graph:
US_violin <- function(a, b, c, d, e, f) {
r1 <- raster(paste0("./US_NO2_Comparison/", (a))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./US_NO2_Comparison/", (b))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
r3 <- raster(paste0("./US_NO2_Comparison/", (c))) %>% flip(direction='y')
r3 <- crop(r3, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r3, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r3 <- r3 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r3)) %>% as.data.frame()
r3 <- cbind(vec, r3)
names(r3)[1] <- "Timeframe"
names(r3)[4] <- "NO2_Concentration"
r4 <- raster(paste0("./US_NO2_Comparison/", (d))) %>% flip(direction='y')
r4 <- crop(r4, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r4, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r4 <- r4 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2020", length.out = nrow(r4)) %>% as.data.frame()
r4 <- cbind(vec, r4)
names(r4)[1] <- "Timeframe"
names(r4)[4] <- "NO2_Concentration"
r5 <- raster(paste0("./US_NO2_Comparison/", (e))) %>% flip(direction='y')
r5 <- crop(r5, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r5, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r5 <- r5 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r5)) %>% as.data.frame()
r5 <- cbind(vec, r5)
names(r5)[1] <- "Timeframe"
names(r5)[4] <- "NO2_Concentration"
r6 <- raster(paste0("./US_NO2_Comparison/", (f))) %>% flip(direction='y')
r6 <- crop(r6, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r6, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r6 <- r6 %>% rasterToPoints() %>% data.frame()
vec <- rep("March-May 2021", length.out = nrow(r6)) %>% as.data.frame()
r6 <- cbind(vec, r6)
names(r6)[1] <- "Timeframe"
names(r6)[4] <- "NO2_Concentration"
df <- rbind(r1, r2, r3, r4, r5, r6)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('US NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
theme(axis.text.x = element_text(angle = 45)) +
scale_x_discrete(limits=c("Jan-March 2019", "March-May 2019", "Jan-March 2020", "March-May 2020", "Jan-March 2021", "March-May 2021")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_violin("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif", "US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif", "US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")US_2019_boxplot <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='US NO2: 2019 Seasonal Change',
ylab='Date',
names = c('2019 Jan-Mar', '2019 Mar-May'))
}
US_2019_boxplot("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif")US_2020_boxplot <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='US NO2 Before and After COVID-19',
ylab='Date',
names = c('2020 Jan-Mar', '2020 Mar-May'))
}
US_2020_boxplot("US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif")US_2021_boxplot <- function(x, y) {
r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='US NO2: 2021 Seasonal Change',
ylab='Date',
names = c('2021 Jan-Mar', '2021 Mar-May'))
}
US_2021_boxplot("US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")#Converting before lockdown and after lockdown rasters for US to data frames.
before.lockdown <- raster('./China_NO2_comparison/China_NO2_2019-11-23_start.tif') %>%
flip(direction='y') %>%
rasterToPoints(spatial = TRUE) %>%
data.frame()
before.lockdown <- before.lockdown %>%
rename(c(NO2_concentration = China_NO2_2019.11.23_start, Longitude = x, Latitude = y)) %>%
select(c("Latitude", "Longitude", "NO2_concentration"))
after.lockdown <- raster('./China_NO2_comparison/China_NO2_2020-01-23_start.tif') %>%
flip(direction='y') %>%
rasterToPoints(spatial = TRUE) %>%
data.frame()
after.lockdown <- after.lockdown %>%
rename(c(NO2_concentration = China_NO2_2020.01.23_start, Longitude = x, Latitude = y)) %>%
select(c("Latitude", "Longitude", "NO2_concentration"))
#Plotting before and after maps.
p1 <- ggplot() +
geom_raster(data = before.lockdown , aes(x = Longitude, y = Latitude, fill = NO2_concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="China", ], aes(x = long, y = lat, group = group), colour = "black", fill = NA) +
coord_quickmap() +
labs(fill = "NO2 Concentration \n(mol/m^2)") +
scale_fill_viridis_c(limits = c(0, 0.0005)) +
ggtitle("China NO2 Concentrations (Sentinel Satellite) Before COVID-19 Lockdown")
p2 <- ggplot() +
geom_raster(data = after.lockdown , aes(x = Longitude, y = Latitude, fill = NO2_concentration)) +
geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="China", ], aes(x = long, y = lat, group = group), colour = "black", fill = NA) +
coord_quickmap() +
labs(fill = "NO2 Concentration \n(mol/m^2)") +
scale_fill_viridis_c(limits = c(0, 0.0005)) +
ggtitle("China NO2 Concentrations (Sentinel Satellite) After COVID-19 Lockdown")
plot_grid(p1, p2, labels = "AUTO", ncol = 1)rm(p1, p2)#Function for finding medians over each time period.
China_NO2_median <- function(x) {
r <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
lapply(r.vals, FUN=median, na.rm = TRUE)
}
China_NO2_median("China_NO2_2019-11-23_start.tif")## [[1]]
## [1] 1.576366e-05
China_NO2_median("China_NO2_2020-01-23_start.tif")## [[1]]
## [1] 1.231765e-05
#Function for finding averages over each time period.
China_NO2_average <- function(x) {
r <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
lapply(r.vals, FUN=mean, na.rm = TRUE)
}
China_NO2_average("China_NO2_2019-11-23_start.tif")## [[1]]
## [1] 4.035193e-05
China_NO2_average("China_NO2_2020-01-23_start.tif") ## [[1]]
## [1] 2.19273e-05
#2019 violin plot
China_2019_violin <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2018-Jan2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
scale_x_discrete(limits=c("Nov2018-Jan2019", "Jan-March 2019")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2019_violin("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif")#2020 violin plot
China_2020_violin <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2019-Jan2020", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2 Before and After COVID-19') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
scale_x_discrete(limits=c("Nov2019-Jan2020", "Jan-March 2020")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2020_violin("China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif")#2021 violin plot
China_2021_violin <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2020-Jan2021", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
df <- rbind(r1, r2)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2: 2021 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
scale_x_discrete(limits=c("Nov2020-Jan2021", "Jan-March 2021")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2021_violin("China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")#All violin plots in 1 graph:
China_violin <- function(a, b, c, d, e, f) {
r1 <- raster(paste0("./China_NO2_Comparison/", (a))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r1 <- r1 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2018-Jan2019", length.out = nrow(r1)) %>% as.data.frame()
r1 <- cbind(vec, r1)
names(r1)[1] <- "Timeframe"
names(r1)[4] <- "NO2_Concentration"
r2 <- raster(paste0("./China_NO2_Comparison/", (b))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- r2 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2019", length.out = nrow(r2)) %>% as.data.frame()
r2 <- cbind(vec, r2)
names(r2)[1] <- "Timeframe"
names(r2)[4] <- "NO2_Concentration"
r3 <- raster(paste0("./China_NO2_Comparison/", (c))) %>% flip(direction='y')
r3 <- crop(r3, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r3, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r3 <- r3 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2019-Jan2020", length.out = nrow(r3)) %>% as.data.frame()
r3 <- cbind(vec, r3)
names(r3)[1] <- "Timeframe"
names(r3)[4] <- "NO2_Concentration"
r4 <- raster(paste0("./China_NO2_Comparison/", (d))) %>% flip(direction='y')
r4 <- crop(r4, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r4, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r4 <- r4 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2020", length.out = nrow(r4)) %>% as.data.frame()
r4 <- cbind(vec, r4)
names(r4)[1] <- "Timeframe"
names(r4)[4] <- "NO2_Concentration"
r5 <- raster(paste0("./China_NO2_Comparison/", (e))) %>% flip(direction='y')
r5 <- crop(r5, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r5, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r5 <- r5 %>% rasterToPoints() %>% data.frame()
vec <- rep("Nov2020-Jan2021", length.out = nrow(r5)) %>% as.data.frame()
r5 <- cbind(vec, r5)
names(r5)[1] <- "Timeframe"
names(r5)[4] <- "NO2_Concentration"
r6 <- raster(paste0("./China_NO2_Comparison/", (f))) %>% flip(direction='y')
r6 <- crop(r6, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r6, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r6 <- r6 %>% rasterToPoints() %>% data.frame()
vec <- rep("Jan-March 2021", length.out = nrow(r6)) %>% as.data.frame()
r6 <- cbind(vec, r6)
names(r6)[1] <- "Timeframe"
names(r6)[4] <- "NO2_Concentration"
df <- rbind(r1, r2, r3, r4, r5, r6)
ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) +
geom_violin(fill="gray") +
ggtitle('China NO2: 2019 Seasonal Change') +
ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
theme(axis.text.x = element_text(angle = 45)) +
scale_x_discrete(limits=c("Nov2018-Jan2019", "Jan-March 2019", "Nov2019-Jan2020", "Jan-March 2020", "Nov2020-Jan2021", "Jan-March 2021")) +
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_violin("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif", "China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif", "China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")China_2019_change <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='China NO2: 2019 Seasonal Change',
ylab='Date',
names = c('Nov2018-Jan2019', 'Jan-March 2019'))
}
China_2019_change("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif")China_2020_change <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='China NO2 Before and After COVID-19',
ylab='Date',
names = c('Nov2019-Jan2020', 'Jan-March 2020'))
}
China_2020_change("China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif")China_2021_change <- function(x, y) {
r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
s <- stack(r1, r2)
boxplot(s, col=c('red', 'blue'),
main='China NO2: 2021 Seasonal Change',
ylab='Date',
names = c('Nov2020-Jan2021', 'jan-March 2021'))
}
China_2021_change("China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")